# General information

This file does the following:

1) Takes imports and exports data from census and groups the sectors to the final 1-14 sectors that we use.

2) Calculates the sector-region bilateral trade flows $X_{ij,k}$ when $i\in US\;  \&\;  j \notin US$ (exports from each state to each country) and $i\notin US\;  \&\;  j \in US$ (imports from each country to each state), for all sectors $k$ (except services).

## Input files

1. `0-Raw_Data/regions.csv`
2. `0-Raw_Data/sectors.csv`
3. `0-Raw_Data//State-Exports-Imports//State Exports by NAICS Commodities-i.csv`
4. `0-Raw_Data//State-Exports-Imports//State Imports by NAICS Commodities-i.csv`
5. `1-Intermediate_Processed_Data/country_country_step_.csv` 

## Output files

1. `1-Intermediate_Processed_Data//census_exports.csv`
2. `1-Intermediate_Processed_Data//census_imports.csv`
3. `1-Intermediate_Processed_Data/state_imports_exports.csv`
4. `1-Intermediate_Processed_Data//state_country_step_y_.csv`
5. `1-Intermediate_Processed_Data//state_country_step_e_.csv`


```{r setup, include=FALSE}
knitr::opts_knit$set(root.dir = normalizePath(".."))
remove(list = ls())
```

# Exports and imports (data)

## Importing datasets to R

```{r data, warning=FALSE}
#sector reclassification table
reclass_sectors <- read.csv(paste("0-Raw_Data/sectors.csv", sep=""), header = TRUE, sep = ",", dec = ".")
reclass_sectors <- reclass_sectors %>% 
  filter(is.na(naics) == FALSE) %>%
  dplyr::select(final_sector, naics)

#Exports by year.
base_e <- c()
for (i in 1:5){
temp <- read.csv(paste("0-Raw_Data//State-Exports-Imports//State Exports by NAICS Commodities-",i,".csv", sep=""), skip = 3, header = FALSE, sep = ",", dec = ".")
base_e <- rbind(base_e, temp)
}

#Imports by year
base_i <- c()
for (i in 1:5){
temp <- read.csv(paste("0-Raw_Data//State-Exports-Imports//State Imports by NAICS Commodities-",i,".csv", sep=""), skip = 3, header = FALSE, sep = ",", dec = ".")
base_i <- rbind(base_i, temp)
}

# Unifying .csv files for exports and imports.

colnames(base_e) <- c("state", "naics3", "country", "year", "exports")
colnames(base_i) <- c("state", "naics3", "country", "year", "imports")

base_e <- base_e %>%
  arrange(year, state) %>%
  dplyr::select(year, state, naics3, country, everything())
base_e$exports <- as.numeric(gsub(",","",base_e$exports))
base_i <- base_i %>%
  arrange(year, state) %>%
  dplyr::select(year, state, naics3, country, everything())
base_i$imports <- as.numeric(gsub(",","",base_i$imports))

#Saving as .csv
write.table(base_e, file = paste("1-Intermediate_Processed_Data//census_exports.csv", sep=""), sep = ",", row.names = FALSE)
write.table(base_i, file = paste("1-Intermediate_Processed_Data//census_imports.csv", sep=""), sep = ",", row.names = FALSE)

```


## Mapping to our relevant 1-14 sectors

```{r}
#Listing our countries of interest, and their respective ISO codes.
list_countries <- c("Australia", "Austria", "Belgium", "Brazil", "Bulgaria", "Canada", "China", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "United Kingdom", "Germany", "Greece", "Hungary", "India", "Indonesia", "Ireland", "Italy", "Japan", "Korea, South", "Lithuania", "Mexico", "Netherlands", "Poland", "Portugal", "Romania", "Russia", "Slovakia", "Slovenia", "Spain", "Sweden", "Taiwan", "Turkey", "Rest of world")
list_codes <- c("AUS" , "AUT" , "BEL" , "BRA", "BGR", "CAN" , "CHN", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "GBR", "DEU", "GRC", "HUN", "IND", "IDN", "IRL", "ITA", "JPN", "KOR", "LTU", "MEX", "NLD", "POL", "PRT", "ROU", "RUS", "SVK", "SVN", "ESP", "SWE", "TWN", "TUR", "RoW")
list <- data.frame(cbind(list_countries, list_codes))
list$list_countries <- as.character(list$list_countries)
list$list_codes <- as.character(list$list_codes)
final <- c()


for (type in c("exports", "imports")){
base <- read.csv(paste("1-Intermediate_Processed_Data//census_", type, ".csv", sep=""), header = TRUE, sep = ",", dec = ".")  
colnames(base) <- c("year", "state", "naics3", "country", "value") 
base <- base %>%
  #Only states.
  filter(state!="Dist of Columbia" & state!="Puerto Rico" & state!="US Virgin Islands" & state!="Unknown") %>%
  #Only countries.
  filter(country!="APEC - Asia Pacific Economic Co-operation" & country!="ASEAN - Association of Southeast Asian Nations" & country!="Asia" & country!="Australia and Oceania" & country!="Central American Common Market" & country!="Euro Area" & country!="Europe" & country!="European Union" & country!="LAFTA - Latin American Free Trade Association" & country!="NATO (North Atlantic Treaty Organization) Allies" & country!="NICS - Newly Industrialized Countries" & country!="OECD - Organization for Economic Co-operation and Development" & country!="OPEC - Organization of Petroleum Exporting Countries" & country!="North America" & country!="Pacific Rim Countries" & country!="South/Central America" & country!="Twenty Latin American Republics" & country!="US Trade Agreements Partners" & country!="Western Sahara") %>%
  mutate(country = as.character(country)) %>%
  #Countries that are not in our list are going to be included in RoW.
  mutate(country = ifelse(country %in% list_countries, as.character(country), "Rest of world")) %>%
  left_join(list, by=c('country'='list_countries')) %>% 
  dplyr::rename(region = list_codes) %>%
  dplyr::select(-country) %>%
  group_by(year, state, naics3, region) %>%
  mutate(value = sum(value)) %>%
  ungroup() %>%
  distinct(year, state, naics3, region, .keep_all = TRUE)
#Remember that we are calculating exports from each state to each country.
if (type == "exports"){
base <- base %>%
    dplyr::rename(origin = state) %>%
    dplyr::rename(destination = region)
} 
#And imports from each country to each state.
if (type == "imports"){
base <- base %>%
    dplyr::rename(destination = state) %>%
    dplyr::rename(origin = region)
}  

#Changing sectors from NAICS to final (only sending sector).
base <- base %>%
    filter(naics3 != "All Commodities") %>%
    mutate(sec_naics = as.numeric(substr(x=naics3, start=1, stop=3))) %>%
    left_join(reclass_sectors, by=c('sec_naics'='naics')) %>% 
    dplyr::rename(sector = final_sector) %>%
    dplyr::select(-naics3, -sec_naics) %>%
    filter(is.na(sector) == FALSE) %>%
    group_by(year, sector, destination, origin) %>%
    mutate(value = sum(value)) %>%
    ungroup() %>%
    distinct(year, sector, destination, origin, .keep_all = TRUE)
#ID for "exports from each state to each country".
if (type == "exports"){
base <- base %>%
    mutate(file="i in US, j not in US")
} 
#ID for "imports from each country to each state".
if (type =="imports"){
base <- base %>%
    mutate(file = "i not in US, j in US")
}  
base <- base %>% arrange(file, year, sector, destination)
base <- base %>% dplyr::select(origin, year, destination, sector, value, file)
base$origin <- gsub(" ", "", base$origin, fixed = TRUE)
base$destination <- gsub(" ", "", base$destination, fixed = TRUE)
final <- rbind(final, base)  
}

write.table(final, file = paste("1-Intermediate_Processed_Data/state_imports_exports.csv", sep=""), sep = ",", row.names = FALSE)

#Reclassification table
#1.	(NAICS 311-312) Food Products, Beverage, and Tobacco Products (c3);
#2.	(NAICS 313-316) Textile, Textile Product Mills, Apparel, Leather, and Allied Products (c4-c5);
#3.	(NAICS 321-323) Wood Products, Paper, Printing, and Related Sup- port Activities (c6-c7); 
#4.	(NAICS 211-213, 324) Petroleum and Coal Products (c8);
#Mining, Quarrying, and Oil and Gas Extraction (c2). 
#5.	(NAICS 325) Chemical (c9);
#6.	(NAICS 326) Plastics and Rubber Products (c10); 
#7.	(NAICS 327) Nonmetallic Mineral Products (c11); 
#8.	(NAICS 331-332) Primary Metal and Fabricated Metal Products (c12); 
#9.	(NAICS 333); Machinery (c13); 
#10.	(NAICS 334-335) Computer and Electronic Products, and Electrical Equipment and Appliances (c14);
#11.	(NAICS 336) Transportation Equipment (c15); 
#12.	(NAICS 337-339) Furniture and Related Products, and Miscellaneous Manufacturing (c16); 
#13.	((NAICS 23) Construction (c18); 
#14.	(NAICS 42-45) Wholesale and Retail Trade (c19-c21); 
#15.	(NAICS 481-488) Transport Services (c23-c26);
#16.	(NAICS 511-518) Information Services (c27);
#17.	(NAICS 521-525) Finance and Insurance (c28); 
#18.	(NAICS 531-533)  Real Estate (c29-c30); 
#19.	(NAICS 61) Education (c32); 
#20.	(NAICS 621-624) Health Care (c33);
#21.	(NAICS 721-722) Accommodation and Food Services (c22); 
#22.	(NAICS 493, 541, 55, 561, 562, 711-713, 811-814) Other Services (c34).
#23.	(NAICS 111-115) Agriculture, Forestry, Fishing, and Hunting (c1)  

```


# Calculating the sector-country bilateral trade flows

Here we compute the sector-country bilateral trade flows ($X_{ij,k}$) when $i\in US\;  \&\;  j \notin US$ (exports from each state to each country) and $i\notin US\;  \&\;  j \in US$ (imports from each country to each state), for all sectors $k$ (except services).

We use $X_{ij,k}^{Census}$ and $X_{ij,k}^{WIOD}$ with a proportionality rule to calculate $X_{ij,k}$ for $i\in US\;  \&\;  j \notin US$ (exports from each State to each country) and $i\notin US\;  \&\;  j \in US$ (imports from each country to each State), and $\forall k$ (except services). 
We do the same procedure for each year. Census data on exports and imports starts in 2002 and 2008, respectively. For the years before the starting Census year we assume that the share of sector-level imports (exports) of each state from (to) each country in the total imports (exports) of the state in each year remains constant at the level of the starting Census year.

Define the share of exports of US State $i$ in sector $k$, going to country $j$ as:

$$y_{ij,k}^{Census}\equiv\dfrac{X_{ij,k}^{Census}}{\sum_{h\in US}X_{hj,k}^{Census}} \; \forall i\in US \, , \, j\notin US.$$ 

Similarly, only for $j \in US$, define the share of imports of State $j$ in sector $k$, coming from country $i$ as

$$e_{ij,k}^{Census}\equiv\dfrac{X_{ij,k}^{Census}}{\sum_{l\in US}X_{il,k}^{Census}}. \; \forall i\notin US \, , \, j\in US.$$ 

There are some very few cases that require corrections. If country j (i) did not import (export) from (to) the US in a given sector $k$ according to CENSUS data but it did it according to WIOD data. Then we would calculate $e_{ij,k}$ ($y_{ij,k}$) by splitting WIOD imports (exports) between the states according to the share of imports (exports) of that state in the total US imports (exports) of that sector $k$. This means:

$$
y_{i\;j,k}^{census}\equiv\dfrac{\sum_{j\notin US}X_{ij,k}^{census}}{\sum_{h\in US}\sum_{l\notin US}X_{hl,k}^{census}}, \; i\in US \, , \, j\notin US
$$

$$
e_{i\;j,k}^{census}\equiv\dfrac{\sum_{i\notin US}X_{ij,k}^{census}}{\sum_{h\notin US}\sum_{l\in US}X_{hl,k}^{census}}, \; i\notin US \, , \, j\in US
$$

Finally, we define our main variable of interest $X_{ij,k}\quad\forall k$: 

$$X_{ij,k}=
\begin{cases}
e_{ij,k}^{Census}X_{i\,US,k}^{WIOD} \quad \forall i\notin US, \; \forall j\in US\\
y_{ij,k}^{Census}X_{US\,j,k}^{WIOD} \quad \forall i\in US, \; \forall j\notin US
\end{cases} $$

## Importing datasets to R

```{r}
#state level imports and exports
state_imp_exp_0 <- read.csv("1-Intermediate_Processed_Data//state_imports_exports.csv")
state_imp_exp_0 <- state_imp_exp_0 %>% filter(origin != "AllStates", destination != "AllStates")

#WIOD
wiod_base <- c()
for (yr in 2000:2007) {
  wiod <- read.csv(paste('1-Intermediate_Processed_Data//country_country_step_', yr, '.csv', sep= ""), header = TRUE, sep = ",", dec = ".")
  wiod_base <- rbind(wiod_base, wiod)
}

#Countries' names.
regions <- read.csv("0-Raw_Data/regions.csv")
c_names <- regions %>% filter(status=="country", region!="USA")
c_names <- c_names$region
#States' names.
s_names <- regions %>% filter(status=="state") 
s_names <- s_names$region
#Number of states and number of countries.
n_s <- length(s_names)
n_c <- length(c_names)
#Number of sectors minus 3.
reclass_sectors <- read.csv("0-Raw_Data/sectors.csv")
n_sec <- length(unique(reclass_sectors$final_sector)) - 3
#Small value.
epsilon <- 0.0001;

```


## States' imports from countries

```{r}
# Base year for imports (2008)
state_imp_exp <- data.frame(state_imp_exp_0) %>% 
  #select imports from each country to each state.
  filter(year == 2008, file == "i not in US, j in US") %>%
  dplyr::select(-year, -file)
#Reshape to wide.
state_imp_exp <-spread(state_imp_exp, origin, value, fill = NA, convert = TRUE,  drop = TRUE, sep = NULL)
base_e <- state_imp_exp %>% 
  arrange(sector, destination) %>%
  #Excluding services.
  filter(sector <= n_sec | sector == (n_sec + 2))
#Rename columns and replace missing values with zeros.
colnames(base_e) <- c("importer", "sector", c_names)
base_e[is.na(base_e)] <- 0

for (yr in 2000:2007) {
#WIOD
wiod <- subset(wiod_base, year == yr)
wiod <- subset(wiod, select = -year) 
df_wiod <- data.frame(wiod)
df_wiod <- df_wiod %>%
  #Exclude services and focus on USA as importer.
  filter(sector <= n_sec | sector == (n_sec + 2)) %>%
  filter(importer == 'USA')
#Exclude USA from the group of exporters.
df_wiod1 <- df_wiod %>% dplyr::select(-USA, -importer)
colnames(df_wiod1) <- c("sector", c_names)
df_wiod1 <- melt(df_wiod1, id.vars=c("sector"), variable.name = "origin", value.name = "value")
df_wiod1 <- df_wiod1 %>% arrange(sector, origin)

#Reshape imports data-set to have countries in columns instead of states.This step eases the calculation of the shares of imports.
eijkBase <- melt(base_e, id.vars=c("importer", "sector"), variable.name="origin", value.name = "value")
eijkBase <-spread(eijkBase, importer, value, fill = NA, convert = TRUE,  drop = TRUE, sep = NULL)
#Only leave values (not ID columns) and calculate e_ijk.
eijkBase <- eijkBase[, 3:dim(eijkBase)[2]] / rowSums(eijkBase[, 3:dim(eijkBase)[2]]) 

#Calculate X_{ij,k}.
step3e <- eijkBase*df_wiod1$value
step3e <- cbind(df_wiod1[, 1:2], step3e)
################################################################################
#Applying proportionality:

#The sum over states of X_{ij,k}, ideally, would sum up to X_{i,US,k} as reported in WIOD. To correct the sectors where that is not the case, we calculate a vector of the differences between the sum over states of X_{ij,k} and X_{i,US,k}.
check <- as.matrix(rowSums(step3e[, 3:dim(step3e)[2]]) - df_wiod1$value)
#Final X_{ij,k}'s calculated as explained above (we will only substitute them for the original if it is necessary).
m_correct <- c()
for (sec in c(1:(n_sec), (n_sec + 2))) {
m <- base_e %>% filter(sector == sec)
m <- (rowSums(m[, 3:dim(m)[2]]) / sum(m[, 3:dim(m)[2]]))
m <- t(matrix(replicate(n_c, m), nrow = length(m), ncol = n_c))
m_correct <- rbind(m_correct, m) 
}
m_correct <- m_correct*df_wiod1$value
#Substitute with corrected X_{ij,k}'s.
for (r in 1:(dim(check)[1])) {
  if(is.na(check[r]) == TRUE){step3e[r, 3:dim(step3e)[2]] = m.correct[r, ]}
}
#If, after correcting for missing values, the sum over states does not coincide with WIOD up to a small difference (tolerance level), the loop stops and prints an error message.
if(sum(abs(rowSums(step3e[, 3:dim(step3e)[2]])/df_wiod1$value -1))>epsilon) 
   stop(paste("states' imports from countries != WIOD US imports from countries for", yr, " value=", sum(abs(rowSums(step3e[, 3:dim(step3e)[2]])/df_wiod1$value -1))))
################################################################################
#Reshape to have importers/states in a single column.
step3e <- melt(step3e, id.vars=c("origin", "sector"), variable.name="importer", value.name = "value")
step3e <-spread(step3e, origin, value, fill = NA, convert = TRUE,  drop = TRUE, sep = NULL)
step3e <- step3e %>%
  mutate(year = yr) %>%
  dplyr::select(year, importer, sector, everything())

  #Exporting data-sets
write.table(step3e, file = paste("1-Intermediate_Processed_Data//state_country_step_e_", yr,".csv", sep=""), sep = ",", row.names = FALSE)
}

```


## States' exports to countries

```{r}

for (yr in 2000:2007) {

#WIOD
wiod <- subset(wiod_base, year == yr)
wiod <- subset(wiod, select = -year) 
df_wiod <- data.frame(wiod)
df_wiod <- df_wiod %>%
  #Exclude services.
  filter(sector <= n_sec | sector == (n_sec + 2)) %>%
  #Exclude USA from the group of importers.
  filter(importer != 'USA') 
df_wiod1 <- df_wiod %>% dplyr::select(importer, sector, USA)

#Base year for exports (2002)
yr. <- yr
if(yr < 2002){yr. = 2002}
state_imp_exp <- data.frame(state_imp_exp_0) %>% 
  #select exports from each state to each country
  filter(year == yr., file == "i in US, j not in US") %>%
  dplyr::select(-year, -file)
#Reshape to wide.
state_imp_exp <- spread(state_imp_exp, origin, value, fill = NA, convert = TRUE,  drop = TRUE, sep = NULL)

# Adding rows if theres is no trade info for country-sector 
temp_names_1 <- as.character(unique(state_imp_exp$destination))
for (i in temp_names_1) {
  for (sec in c(1:12, 14)) {
    temp_1 <- state_imp_exp %>% filter(destination == i, sector == sec)
    if(dim(temp_1)[1] == 0){
      row <- c(i, sec, matrix(0, nrow = 1, ncol = n_s))
      state_imp_exp <- rbind(state_imp_exp, row)}
    }
}

state_imp_exp$sector <- as.numeric(state_imp_exp$sector)
base_y <- state_imp_exp %>% 
  arrange(sector, destination) %>%
  #Excluding services.
  filter(sector <= n_sec | sector == (n_sec + 2))
#Rename columns and replace missing values with zeros.
colnames(base_y) = c("importer", "sector", s_names)
base_y[is.na(base_y)] <- 0
base_y <- cbind(base_y[, 1:2], mapply(base_y[, 3:dim(base_y)[2]], FUN=as.numeric))

#Only leave values (not ID columns) and calculate y_ijk.
yijkBase <- base_y[, 3:dim(base_y)[2]] / rowSums(base_y[, 3:dim(base_y)[2]]) 

#Calculate X_{ij,k}.
step3y <- yijkBase*df_wiod1$USA
step3y <- cbind(df_wiod1[, 1:2], step3y)

################################################################################
#Applying proportionality:

#The sum over states of X_{ij,k}, ideally, would sum up to X_{US,j,k} as reported in WIOD. To correct the sectors where that is not the case, we calculate a vector of the differences between the sum over states of X_{ij,k} and X_{US,j,k}.
check <- as.matrix(rowSums(step3y[, 3:dim(step3y)[2]]) - df_wiod1$USA)

base_y1 <- melt(base_y, id.vars=c("importer", "sector"), variable.name="origin", value.name = "value")
base_y1 <-spread(base_y1, importer, value, fill = NA, convert = TRUE,  drop = TRUE, sep = NULL)
#Corrected X_{ij,k}'s calculated as explained above (we will only substitute them for the original if it is necessary).
m_correct <- c()
for (sec in c(1:(n_sec), (n_sec + 2))) {
m <- base_y1 %>% filter(sector == sec)
m <- (rowSums(m[, 3:dim(m)[2]]) / sum(m[, 3:dim(m)[2]]))
m <- t(matrix(replicate(n_c, m), nrow = length(m), ncol = n_c))
m_correct <- rbind(m_correct, m) 
}
m_correct <- m_correct*df_wiod1$USA
#Substitute with corrected X_{ij,k}'s.
for (r in 1:(dim(check)[1])) {
  if(is.na(check[r]) == TRUE){step3y[r, 3:dim(step3y)[2]] = m_correct[r, ]}
}
#If, after correcting for missing values, the sum over states does not coincide with WIOD up to a small difference (tolerance level), the loop stops and prints an error message.
if(sum(abs(rowSums(step3y[, 3:dim(step3y)[2]])/df_wiod1$USA -1))>epsilon) 
   stop(paste("states' exports to countries != WIOD US exports to countries for", yr, " value=", sum(abs(rowSums(step3y[, 3:dim(step3y)[2]])/df_wiod1$USA -1))))
################################################################################

step3y <- step3y %>%
  mutate(year = yr) %>%
  dplyr::select(year, importer, sector, everything())
colnames(step3y) <- c("year", "importer", "sector", s_names)

#Exporting data-sets.
write.table(step3y, file = paste("1-Intermediate_Processed_Data//state_country_step_y_", yr,".csv", sep=""), sep = ",", row.names = FALSE)
}
```

